Load the dataset we want.
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.2
## -- Attaching packages ---------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 2.2.1 <U+221A> purrr 0.2.4
## <U+221A> tibble 1.3.4 <U+221A> dplyr 0.7.4
## <U+221A> tidyr 0.7.2 <U+221A> stringr 1.2.0
## <U+221A> readr 1.1.1 <U+221A> forcats 0.2.0
## Warning: package 'tidyr' was built under R version 3.4.2
## Warning: package 'purrr' was built under R version 3.4.2
## Warning: package 'dplyr' was built under R version 3.4.2
## -- Conflicts ------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(janitor)
library(ggridges)
library(ggthemes)
#embedding plots in rmarkdown
knitr::opts_chunk$set(fig.width=12, fig.height=8, out.width = "80%")
theme_set(theme_bw())
First we import the data and clean it.
health =
readxl::read_xls('../food_enviroment_atlas.xls', sheet = 'HEALTH') %>%
clean_names() %>%
select(1:7)
socioeconomic =
readxl::read_xls('../food_enviroment_atlas.xls', sheet = 'SOCIOECONOMIC') %>%
clean_names() %>%
select(1:3, 10:18)
assistance =
readxl::read_xls('../food_enviroment_atlas.xls', sheet = 'ASSISTANCE') %>%
clean_names() %>%
select(1:3, 23:29)
restaurant =
readxl::read_xls('../food_enviroment_atlas.xls', sheet = 'RESTAURANTS') %>%
clean_names() %>%
select(1:9, 16:17)
county =
readxl::read_xls('../food_enviroment_atlas.xls', sheet = 'Supplemental Data - County') %>%
clean_names()
state =
readxl::read_xls('../food_enviroment_atlas.xls', sheet = 'Supplemental Data - State') %>%
clean_names() %>%
select(1:2, 9:14, 33:40)
store =
readxl::read_xls('../food_enviroment_atlas.xls', sheet = 'STORES') %>%
clean_names() %>%
select(1:27)
health_state = health %>%
group_by(state) %>%
summarise(pct_diabetes_adults08 = mean(pct_diabetes_adults08),
pct_diabetes_adults13 = mean(pct_diabetes_adults13),
pct_obese_adults08 = mean(pct_obese_adults08),
pct_obese_adults13 = mean(pct_obese_adults13))
socioeconomic_state = socioeconomic %>%
group_by(state) %>%
summarise(pct_65older10 = mean(pct_65older10),
pct_18younger10 = mean(pct_18younger10),
medhhinc15 = mean(medhhinc15),
povrate15 = mean(povrate15),
childpovrate15 = mean(childpovrate15),
perpov10 = mean(perpov10)/n(),
perchldpov10 = mean(perchldpov10)/n())
social_health_whole = merge(socioeconomic, health,by=c("fips", "state", "county"))
social_health = merge(socioeconomic_state, health_state,by=c("state"))
#normally distributed
hist(health$pct_obese_adults13)
hist(health$pct_diabetes_adults13)
# median income VS obesity
social_health_whole %>%
group_by(state) %>%
ggplot(aes(x = medhhinc15, y = pct_obese_adults13)) +
geom_point(aes(color = state, size = 1), alpha = .6) +
geom_smooth() +
labs(
x = "Median household income, 2015",
y = "Percentage of adult obesity, 2013 ") +
theme(text = element_text(size = 14),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10))
## `geom_smooth()` using method = 'gam'
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
# median income VS diabetes
social_health_whole %>%
group_by(state) %>%
ggplot(aes(x = medhhinc15, y = pct_diabetes_adults13)) +
geom_point(aes(color = state, size = 1), alpha = .6) +
geom_smooth() +
labs(
x = "Median household income, 2015",
y = "Percentage of adult diabetes, 2013 ") +
theme(text = element_text(size = 14),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10))
## `geom_smooth()` using method = 'gam'
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
# Diabetes VS obesity
social_health_whole %>%
group_by(state) %>%
ggplot(aes(x = pct_obese_adults13, y = pct_diabetes_adults13)) +
geom_point(aes(color = state, size = 1), alpha = .6) +
geom_smooth() +
labs(
x = "Percentage of adult obesity, 2013",
y = "Percentage of adult diabetes, 2013 ") +
theme(text = element_text(size = 14),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10))
## `geom_smooth()` using method = 'gam'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).